{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "857bc9ec",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "df8c5b59",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(52002, 3)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_json(\"alpaca_data.json\")\n",
    "df.columns = [\"question\",\"context\",\"ideal_answer\"]\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "7ed87ede",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['question', 'context', 'ideal_answer'], dtype='object')"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "c2c4fb64",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>question</th>\n",
       "      <th>context</th>\n",
       "      <th>ideal_answer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Identify the odd one out.</td>\n",
       "      <td>Twitter, Instagram, Telegram</td>\n",
       "      <td>Telegram</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Explain why the following fraction is equivale...</td>\n",
       "      <td>4/16</td>\n",
       "      <td>The fraction 4/16 is equivalent to 1/4 because...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Evaluate this sentence for spelling and gramma...</td>\n",
       "      <td>He finnished his meal and left the resturant</td>\n",
       "      <td>He finished his meal and left the restaurant.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Classify the following into animals, plants, a...</td>\n",
       "      <td>Oak tree, copper ore, elephant</td>\n",
       "      <td>Oak tree: Plant\\n Copper ore: Mineral\\n Elepha...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Reverse engineer this code to create a new ver...</td>\n",
       "      <td>def factorialize(num):\\n  factorial = 1\\n  for...</td>\n",
       "      <td>def factorialize(num):\\n  factorial = 1\\n  for...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20674</th>\n",
       "      <td>Create a classification algorithm for a given ...</td>\n",
       "      <td>Dataset of medical images</td>\n",
       "      <td>We can use a convolutional neural network (CNN...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20675</th>\n",
       "      <td>Complete the following sentence by inserting a...</td>\n",
       "      <td>The sun was ____ and the sky was ____.</td>\n",
       "      <td>The sun was shining and the sky was clear.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20676</th>\n",
       "      <td>Arrange the items given below in the order to ...</td>\n",
       "      <td>cake, me, eating</td>\n",
       "      <td>I eating cake.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20677</th>\n",
       "      <td>Write an introductory paragraph about a famous...</td>\n",
       "      <td>Michelle Obama</td>\n",
       "      <td>Michelle Obama is an inspirational woman who r...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20678</th>\n",
       "      <td>Analyze the given legal document and explain t...</td>\n",
       "      <td>The following is an excerpt from a contract be...</td>\n",
       "      <td>This legal document states that Company A has ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>20679 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                question  \\\n",
       "0                              Identify the odd one out.   \n",
       "1      Explain why the following fraction is equivale...   \n",
       "2      Evaluate this sentence for spelling and gramma...   \n",
       "3      Classify the following into animals, plants, a...   \n",
       "4      Reverse engineer this code to create a new ver...   \n",
       "...                                                  ...   \n",
       "20674  Create a classification algorithm for a given ...   \n",
       "20675  Complete the following sentence by inserting a...   \n",
       "20676  Arrange the items given below in the order to ...   \n",
       "20677  Write an introductory paragraph about a famous...   \n",
       "20678  Analyze the given legal document and explain t...   \n",
       "\n",
       "                                                 context  \\\n",
       "0                           Twitter, Instagram, Telegram   \n",
       "1                                                   4/16   \n",
       "2           He finnished his meal and left the resturant   \n",
       "3                         Oak tree, copper ore, elephant   \n",
       "4      def factorialize(num):\\n  factorial = 1\\n  for...   \n",
       "...                                                  ...   \n",
       "20674                          Dataset of medical images   \n",
       "20675             The sun was ____ and the sky was ____.   \n",
       "20676                                   cake, me, eating   \n",
       "20677                                     Michelle Obama   \n",
       "20678  The following is an excerpt from a contract be...   \n",
       "\n",
       "                                            ideal_answer  \n",
       "0                                               Telegram  \n",
       "1      The fraction 4/16 is equivalent to 1/4 because...  \n",
       "2          He finished his meal and left the restaurant.  \n",
       "3      Oak tree: Plant\\n Copper ore: Mineral\\n Elepha...  \n",
       "4      def factorialize(num):\\n  factorial = 1\\n  for...  \n",
       "...                                                  ...  \n",
       "20674  We can use a convolutional neural network (CNN...  \n",
       "20675         The sun was shining and the sky was clear.  \n",
       "20676                                     I eating cake.  \n",
       "20677  Michelle Obama is an inspirational woman who r...  \n",
       "20678  This legal document states that Company A has ...  \n",
       "\n",
       "[20679 rows x 3 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df=df[df.context != \"\"].reset_index().drop(\"index\",axis=1)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "93121e6d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "20bd2a49",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/Users/sahil/Documents/GitHub/Foundation-models/App_Evaluation/IBM_Gen_Al_Evaluation\n"
     ]
    }
   ],
   "source": [
    "cd /Users/sahil/Documents/GitHub/Foundation-models/App_Evaluation/IBM_Gen_Al_Evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "70dc25da",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(101, 3)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_excel(\"IBM_Gen_AI_Q_and_A_Consolidated.xlsx\")\n",
    "df = df[[\"Question\",\"passage 1\",\"Gold answer\"]]\n",
    "df.columns = [\"question\",\"context\",\"ideal_answer\"]\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "8f7dbf07",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>question</th>\n",
       "      <th>context</th>\n",
       "      <th>ideal_answer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>How do I get ready for using Sourcing and Proc...</td>\n",
       "      <td>To implement the functions available in Sourci...</td>\n",
       "      <td>Activate the Scope Items in SAP Best Practices.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>What features does SAP Sourcing and Procument ...</td>\n",
       "      <td>SAP S/4HANA Sourcing and Procumenet offers the...</td>\n",
       "      <td>Pricing, Manage Teams and Responsibilities - P...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>What integration scenarios are available for S...</td>\n",
       "      <td>The following integration scenarios are suppor...</td>\n",
       "      <td>\\nIntegration of SAP S/4HANA Procurement Cloud...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Can I connect my SAP S/4HANA Cloud with extern...</td>\n",
       "      <td>Integration of the procurement functionality o...</td>\n",
       "      <td>Yes. Integration of the procurement functional...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>What does Central Procument do?</td>\n",
       "      <td>With Central Procurement, you can integrate yo...</td>\n",
       "      <td>Integrate your SAP S/4HANA Cloud system with o...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>When do I use static rounding?</td>\n",
       "      <td>A static rounding profile comprises threshold ...</td>\n",
       "      <td>Static rounding round up to a multiple of a un...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>How can I adapt existing fields in SAP S/4HANA...</td>\n",
       "      <td>You can change the layout of tables and forms ...</td>\n",
       "      <td>You can change the layout of tables and forms ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>How can I create custom logic for the BAdIs in...</td>\n",
       "      <td>Many business objects have enhancement spots (...</td>\n",
       "      <td>In the Custom Logic app, you can create custom...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>Which information is avaiblable on the Purchas...</td>\n",
       "      <td>- Shows you the overall purchasing spend, calc...</td>\n",
       "      <td>It shows you the overall purchasing spend, cal...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100</th>\n",
       "      <td>How can I change a price relating to a purchas...</td>\n",
       "      <td>You can change a price relating to a purchase ...</td>\n",
       "      <td>You can change a price relating to a purchase ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>101 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              question  \\\n",
       "0    How do I get ready for using Sourcing and Proc...   \n",
       "1    What features does SAP Sourcing and Procument ...   \n",
       "2    What integration scenarios are available for S...   \n",
       "3    Can I connect my SAP S/4HANA Cloud with extern...   \n",
       "4                      What does Central Procument do?   \n",
       "..                                                 ...   \n",
       "96                     When do I use static rounding?    \n",
       "97   How can I adapt existing fields in SAP S/4HANA...   \n",
       "98   How can I create custom logic for the BAdIs in...   \n",
       "99   Which information is avaiblable on the Purchas...   \n",
       "100  How can I change a price relating to a purchas...   \n",
       "\n",
       "                                               context  \\\n",
       "0    To implement the functions available in Sourci...   \n",
       "1    SAP S/4HANA Sourcing and Procumenet offers the...   \n",
       "2    The following integration scenarios are suppor...   \n",
       "3    Integration of the procurement functionality o...   \n",
       "4    With Central Procurement, you can integrate yo...   \n",
       "..                                                 ...   \n",
       "96   A static rounding profile comprises threshold ...   \n",
       "97   You can change the layout of tables and forms ...   \n",
       "98   Many business objects have enhancement spots (...   \n",
       "99   - Shows you the overall purchasing spend, calc...   \n",
       "100  You can change a price relating to a purchase ...   \n",
       "\n",
       "                                          ideal_answer  \n",
       "0      Activate the Scope Items in SAP Best Practices.  \n",
       "1    Pricing, Manage Teams and Responsibilities - P...  \n",
       "2    \\nIntegration of SAP S/4HANA Procurement Cloud...  \n",
       "3    Yes. Integration of the procurement functional...  \n",
       "4    Integrate your SAP S/4HANA Cloud system with o...  \n",
       "..                                                 ...  \n",
       "96   Static rounding round up to a multiple of a un...  \n",
       "97   You can change the layout of tables and forms ...  \n",
       "98   In the Custom Logic app, you can create custom...  \n",
       "99   It shows you the overall purchasing spend, cal...  \n",
       "100  You can change a price relating to a purchase ...  \n",
       "\n",
       "[101 rows x 3 columns]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
